start kernel part VII
I/O APIC mapping initialize
The IOAPIC provides multi-processor interrupt management and incorporates both static and dynamic symmetric interrupt distribution across all processors. In systems with multiple I/O subsystems, each subsystem can have its own set of interrupts.
ioapic_init_mappings
setup resource for every I/O APIC with routine ioapic_setup_resources
4169 ioapic_res = ioapic_setup_resources(nr_ioapics);
(gdb) s
ioapic_setup_resources (nr_ioapics=1) at arch/x86/kernel/apic/io_apic.c:4140
4140 if (nr_ioapics <= 0)
(gdb) n
4146 mem = alloc_bootmem(n);
(gdb)
4144 n *= nr_ioapics;
(gdb)
4146 mem = alloc_bootmem(n);
(gdb)
4149 mem += sizeof(struct resource) * nr_ioapics;
(gdb)
4151 for (i = 0; i < nr_ioapics; i++) {
(gdb)
4152 res[i].name = mem;
(gdb)
4153 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
(gdb)
4154 sprintf(mem, "IOAPIC %u", i);
(gdb)
4155 mem += IOAPIC_RESOURCE_NAME_SIZE;
(gdb)
4151 for (i = 0; i < nr_ioapics; i++) {
(gdb)
4158 ioapic_resources = res;
After completed resource setup, mapping response routine for each resource.
ioapic_init_mappings () at arch/x86/kernel/apic/io_apic.c:4170
4170 for (i = 0; i < nr_ioapics; i++) {
(gdb)
4171 if (smp_found_config) {
(gdb)
4172 ioapic_phys = mp_ioapics[i].apicaddr;
(gdb)
4174 if (!ioapic_phys) {
(gdb)
4192 set_fixmap_nocache(idx, ioapic_phys);
(gdb)
4193 apic_printk(APIC_VERBOSE,
(gdb)
4198 ioapic_res->start = ioapic_phys;
(gdb)
4199 ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
(gdb)
4200 ioapic_res++;
(gdb)
4202 }
probe number of gsi
ACPI uses a cookie system to “name” in-terrupts known as Global System Interrupts. Each interrupt controller input pin is assigned a GSI using a fairly simple scheme. For the 8259A case, the GSIs map directly to ISA IRQs. Thus, IRQ 0 is GSI 0, etc. The APIC case is slightly more complicated, but still simple. Each I/O APIC is assigned a base GSI by the BIOS. Each input pin on the I/O APIC is mapped to a GSI number by adding the pin number (zero-based) to the base GSI. Thus, if an I/O APIC has a base GSI of N, pin 0 on that I/O APIC has a GSI of N, pin 1 has a GSI of N + 1, etc. The I/O APIC with a base GSI of 0 maps the ISA IRQs onto its first 16 input pins. Thus, the ISA IRQs are ef-fectively always mapped 1:1 onto GSIs. More details about GSIs can be found in Section 5.2.11 of the ACPI 2.0c spec.
probe_nr_irqs_gsi
involve acpi_probe_gsi
to get number of gsi and update nr_irqs_gsi
if the result is bigger than it.
mark e820 reserved area as busy
e820_reserve_resources
allocate memory for e820_res
and initialize every memory resource, insert resource to resource tree if the memory are isn't reserved or memory below 1M.
Add saved memory region to firmware memory map.
Breakpoint 2, e820_reserve_resources () at arch/x86/kernel/e820.c:1335
1335 {
(gdb) n
1340 res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
(gdb)
1335 {
(gdb)
1340 res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
(gdb)
1342 for (i = 0; i < e820.nr_map; i++) {
(gdb)
1340 res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
(gdb)
1341 e820_res = res;
(gdb)
1342 for (i = 0; i < e820.nr_map; i++) {
(gdb)
1343 end = e820.map[i].addr + e820.map[i].size - 1;
(gdb)
1348 res->name = e820_type_to_string(e820.map[i].type);
(gdb)
1349 res->start = e820.map[i].addr;
(gdb)
1350 res->end = end;
(gdb)
1352 res->flags = IORESOURCE_MEM;
(gdb)
1359 if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
(gdb)
1360 res->flags |= IORESOURCE_BUSY;
(gdb)
1361 insert_resource(&iomem_resource, res);
(gdb)
1363 res++;
(gdb)
1342 for (i = 0; i < e820.nr_map; i++) {
(gdb) break if i==7
Breakpoint 3 at 0xc17006bc: file arch/x86/kernel/e820.c, line 1342.
(gdb) c
Continuing.
Breakpoint 3, e820_reserve_resources () at arch/x86/kernel/e820.c:1342
1342 for (i = 0; i < e820.nr_map; i++) {
(gdb) n
1366 for (i = 0; i < e820_saved.nr_map; i++) {
(gdb) p e820_saved.nr_map
$1 = 6
(gdb) p e820.nr_map
$2 = 8
(gdb) n
1368 firmware_map_add_early(entry->addr,
(gdb) s
e820_type_to_string (e820_type=1) at arch/x86/kernel/e820.c:1320
1320 switch (e820_type) {
(gdb) n
1322 case E820_RAM: return "System RAM";
(gdb)
e820_reserve_resources () at arch/x86/kernel/e820.c:1369
1368 firmware_map_add_early(entry->addr,
(gdb) s
firmware_map_add_early (start=0, end=654335, type=0xc15cfb9b "System RAM")
at drivers/firmware/memmap.c:164
164 {
(gdb) n
167 entry = alloc_bootmem(sizeof(struct firmware_map_entry));
(gdb)
168 if (WARN_ON(!entry))
(gdb)
171 return firmware_map_add_entry(start, end, type, entry);
(gdb) s
firmware_map_add_entry (entry=0xc2126aa0, type=0xc15cfb9b "System RAM",
end=654335, start=0) at drivers/firmware/memmap.c:112
112 BUG_ON(start > end);
(gdb) n
171 return firmware_map_add_entry(start, end, type, entry);
(gdb) s
172 }
(gdb)
e820_reserve_resources () at arch/x86/kernel/e820.c:1366
1366 for (i = 0; i < e820_saved.nr_map; i++) {
mark pages don't correcspond to e820 RAM areas as nosave
e820_mark_nosave_regions
requires the e820 map to be sorted and without any overlapping entries and assumes the first area to be RAM.
e820_mark_nosave_regions
involves register_nosave_region
to add memory region to nosave list.
reserve resources
32bit specific setup functions are initialized in i386_default_early_setup
.
i386_reserve_resources
is the reserve resource function in x86_init
. In i386_reserve_resources
it reserves video
ram resouce with function request_resource
.
/**
* request_resource - request and reserve an I/O or memory resource
* @root: root resource descriptor
* @new: resource descriptor desired by caller
*
* Returns 0 for success, negative error code on error.
*/
int request_resource(struct resource *root, struct resource *new)
{
struct resource *conflict;
write_lock(&resource_lock);
conflict = __request_resource(root, new);
write_unlock(&resource_lock);
return conflict ? -EBUSY : 0;
}
request_resource
involves __request_resource
, the entire debug information as follow:
203 conflict = __request_resource(root, new);
(gdb) s
__request_resource (root=root@entry=0xc16997c0 <iomem_resource>,
new=new@entry=0xc16944e0 <video_ram_resource>) at kernel/resource.c:147
147 resource_size_t end = new->end;
(gdb) p new
$1 = (struct resource *) 0xc16944e0 <video_ram_resource>
(gdb) p *new
$2 = {start = 655360, end = 786431, name = 0xc15cf9eb "Video RAM area",
flags = 2147484160, parent = 0x0, sibling = 0x0, child = 0x0}
(gdb) p /x *new
$3 = {start = 0xa0000, end = 0xbffff, name = 0xc15cf9eb, flags = 0x80000200,
parent = 0x0, sibling = 0x0, child = 0x0}
(gdb) p root
$4 = (struct resource *) 0xc16997c0 <iomem_resource>
(gdb) p *root
$5 = {start = 0, end = 18446744073709551615, name = 0xc15d46e7 "PCI mem",
flags = 512, parent = 0x0, sibling = 0x0, child = 0xc2126980}
(gdb) p /x *root
$6 = {start = 0x0, end = 0xffffffffffffffff, name = 0xc15d46e7, flags = 0x200,
parent = 0x0, sibling = 0x0, child = 0xc2126980}
(gdb) n
146 resource_size_t start = new->start;
(gdb)
147 resource_size_t end = new->end;
(gdb)
150 if (end < start)
(gdb)
152 if (start < root->start)
(gdb)
154 if (end > root->end)
(gdb)
156 p = &root->child;
(gdb)
158 tmp = *p;
(gdb)
159 if (!tmp || tmp->start > end) {
(gdb) p tmp
$7 = (struct resource *) 0xc2126980
(gdb) n
165 p = &tmp->sibling;
(gdb)
166 if (tmp->end < start)
(gdb)
158 tmp = *p;
(gdb)
159 if (!tmp || tmp->start > end) {
(gdb) p tmp
$8 = (struct resource *) 0xc21269a4
(gdb) n
165 p = &tmp->sibling;
(gdb)
166 if (tmp->end < start)
(gdb)
158 tmp = *p;
(gdb)
159 if (!tmp || tmp->start > end) {
(gdb)
165 p = &tmp->sibling;
(gdb)
166 if (tmp->end < start)
(gdb)
158 tmp = *p;
(gdb)
159 if (!tmp || tmp->start > end) {
(gdb)
165 p = &tmp->sibling;
(gdb)
166 if (tmp->end < start)
(gdb)
158 tmp = *p;
(gdb)
159 if (!tmp || tmp->start > end) {
(gdb)
160 new->sibling = tmp;
(gdb)
161 *p = new;
(gdb)
162 new->parent = root;
(gdb)
163 return NULL;
(gdb) p tmp
$9 = (struct resource *) 0xc1694900 <video_rom_resource>
(gdb) p *tmp
$10 = {start = 786432, end = 822783, name = 0xc15cfa8e "Video ROM",
flags = 2147492352, parent = 0xc16997c0 <iomem_resource>,
sibling = 0xc1694940 <adapter_rom_resources>, child = 0x0}
After reserving video ram resource completed, continue resource standard I/O resources with function reserve_standard_io_resources
.
search biggest gap in e820 memory space and pass the result to PCI to assign MIMO resources
e820_setup_gap
involves e820_search_gap
to search grap from address 0x10000000
size 0x400000
.
The result of searching gap as follow:
(gdb) p /x *gapstart
$21 = 0x8000000
(gdb) p /x *gapsize
$22 = 0xf7fc0000
Finally saves the start address of gap to pci_mem_start
save the init thermal LVT value
mcheck_intel_therm_init
checks if cpu has feature ACPI and ACC, saves LVT value if cpu has above features.
1058 mcheck_intel_therm_init();
(gdb) s
mcheck_intel_therm_init () at arch/x86/kernel/cpu/mcheck/therm_throt.c:260
260 {
(gdb) n
266 if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) &&
(gdb)
269 }
setup_arch
routine ended here, let's continue start_kernel.
store untouched and touched command line
/*
* We need to store the untouched command line for future reference.
* We also need to store the touched command line since the parameter
* parsing is performed in place, and we should allow a component to
* store reference of name/value for future reference.
*/
static void __init setup_command_line(char *command_line)
{
saved_command_line = alloc_bootmem(strlen (boot_command_line)+1);
static_command_line = alloc_bootmem(strlen (command_line)+1);
strcpy (saved_command_line, boot_command_line);
strcpy (static_command_line, command_line);
}
setup_command_line
allocates memory for boot command line and static command line and stores command line to allocated
memory.
set number of cpu id
/* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */
static void __init setup_nr_cpu_ids(void)
{
nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
}
setup_nr_cpu_ids
gets number of cpus from cpu mask.
setup per cpu variables
Per cpu variable is a feature in linux kernel, every cpu has its own variable which stored in data section belong to it.
To understand per cpu varaibe let's check declaration/defination for per cpu variable macro:
per cpu variable declared/defined in include/linux/percpu_defs.h:78
/*
* Variant on the per-CPU variable declaration/definition theme used for
* ordinary per-CPU variables.
*/
#define DECLARE_PER_CPU(type, name) \
DECLARE_PER_CPU_SECTION(type, name, "")
#define DEFINE_PER_CPU(type, name) \
DEFINE_PER_CPU_SECTION(type, name, "")
DEFINE_PER_CPU_SECTION(type, name, "")
declared/defined in include/linux/percpu_defs.h:67
/*
* Normal declaration and definition macros.
*/
#define DECLARE_PER_CPU_SECTION(type, name, sec) \
extern __PCPU_ATTRS(sec) __typeof__(type) per_cpu__##name
#define DEFINE_PER_CPU_SECTION(type, name, sec) \
__PCPU_ATTRS(sec) PER_CPU_DEF_ATTRIBUTES \
__typeof__(type) per_cpu__##name
__PCPU_ATTRS(sec)
declared/defined in include/linux/percpu_defs.h:10
/*
* Base implementations of per-CPU variable declarations and definitions, where
* the section in which the variable is to be placed is provided by the
* 'sec' argument. This may be used to affect the parameters governing the
* variable's storage.
*
* NOTE! The sections for the DECLARE and for the DEFINE must match, lest
* linkage errors occur due the compiler generating the wrong code to access
* that section.
*/
#define __PCPU_ATTRS(sec) \
__attribute__((section(PER_CPU_BASE_SECTION sec))) \
PER_CPU_ATTRIBUTES
PER_CPU_BASE_SECTION
declared/defined in include/asm-generic/percpu.h:73
, here we suppose smp configured.
#ifdef CONFIG_SMP
#define PER_CPU_BASE_SECTION ".data.percpu"
#else
#define PER_CPU_BASE_SECTION ".data"
#endif
PER_CPU_ATTRIBUTES
declared/defined in include/asm-generic/percpu.h:99
#ifndef PER_CPU_ATTRIBUTES
#define PER_CPU_ATTRIBUTES
#endif
If we define a variable with type int
and name x
, after macro expanded, the actual result as follow:
extern __attribute__((section(".data.percpu" ""))) int per_cpu_x
setup_per_cpu_areas
is used to setup per cpu variable areas, default function for allocating per cpu area is pcpu_embed_first_chunk
if percpu_alloc
doesn't exist in startup command line. pcpu_embed_first_chunk
allocates memory for all cpus' allocation information and setup first chunk.
After pcpu_embed_first_chunk
completed, setup offset and segment for every cpu, initialize per cpu variable x86_cpu_to_apicid
, irq_stack_ptr
...
Here is the debug information after pcpu_build_alloc_info
completd in function pcpu_embed_first_chunk
when I configure cpu number 2 (qemu-system-i386 -s -S -smp 2 disk.img).
(gdb) p ai
$11 = (struct pcpu_alloc_info *) 0xc2126ce0
(gdb) p *ai
$12 = {static_size = 1353816, reserved_size = 0, dyn_size = 22440,
unit_size = 2097152, atom_size = 2097152, alloc_size = 2097152,
__ai_size = 4096, nr_groups = 1, groups = 0xc2126d00}
(gdb) p ai->groups
$13 = 0xc2126d00
(gdb) p *ai->groups
$14 = {nr_units = 2, base_offset = 0, cpu_map = 0xc2126d0c}
After allocation information allocated successfully, further allocates memory for group and cpus, copy content in .data.percpu
to per cpu segments.
(gdb) n
pcpu_embed_first_chunk (reserved_size=reserved_size@entry=0,
dyn_size=dyn_size@entry=20480, atom_size=2097152,
cpu_distance_fn=cpu_distance_fn@entry=0xc1705aa1 <pcpu_cpu_distance>,
alloc_fn=alloc_fn@entry=0xc1705aca <pcpu_fc_alloc>,
free_fn=free_fn@entry=0xc1705ab5 <pcpu_fc_free>) at mm/percpu.c:1876
1876 if (IS_ERR(ai))
(gdb)
1879 size_sum = ai->static_size + ai->reserved_size + ai->dyn_size;
(gdb)
1880 areas_size = PFN_ALIGN(ai->nr_groups * sizeof(void *));
(gdb)
1882 areas = alloc_bootmem_nopanic(areas_size);
(gdb)
1883 if (!areas) {
(gdb)
1891 unsigned int cpu = NR_CPUS;
(gdb)
1894 for (i = 0; i < gi->nr_units && cpu == NR_CPUS; i++)
(gdb)
1895 cpu = gi->cpu_map[i];
(gdb)
1899 ptr = alloc_fn(cpu, gi->nr_units * ai->unit_size, atom_size);
(gdb)
1900 if (!ptr) {
(gdb)
1904 areas[group] = ptr;
(gdb)
1906 base = min(ptr, base);
(gdb)
1908 for (i = 0; i < gi->nr_units; i++, ptr += ai->unit_size) {
(gdb)
1909 if (gi->cpu_map[i] == NR_CPUS) {
(gdb)
1915 memcpy(ptr, __per_cpu_load, ai->static_size);
(gdb)
There will be N .data.percpu
after pcpu_embed_first_chunk
completes, here N is the number of cpus in system.
prepare for boot cpu
Do some preparation for boot cpu.
void __init native_smp_prepare_boot_cpu(void)
{
int me = smp_processor_id();
switch_to_new_gdt(me);
/* already set me in cpu_online_mask in boot_cpu_init() */
cpumask_set_cpu(me, cpu_callout_mask);
per_cpu(cpu_state, me) = CPU_ONLINE;
}
Inside native_smp_prepare_boot_cpu
, get id of boot cpu with smp_processor_id
, with switch_to_new_gdt
loading gdt and data sections, set cpu mask to indicate boot cpu is online.
create node and zone for memory management
Linux has a structure describing memory which is used to keep account of memory banks, pages and the flags that affect VM behaviour.
The first principal concept prevalent in the VM is Non-Uniform Memory Access (NUMA). With large scale machines, memory may be arranged into banks that incur a different cost to access depending on the “distance” from the processor. For example, there might be a bank of memory assigned to each CPU or a bank of memory very suitable for DMA near device cards.
Each bank is called a node and the concept is represented under Linux by a struct pglist_data even if the architecture is UMA. This struct is always referenced to by it's typedef pg_data_t. Every node in the system is kept on a NULL terminated list called pgdat_list and each node is linked to the next with the field pg_data_t→node_next. For UMA architectures like PC desktops, only one static pg_data_t structure called contig_page_data is used.
Each node is divided up into a number of blocks called zones which represent ranges within memory. Zones should not be confused with zone based allocators as they are unrelated. A zone is described by a struct zone_struct, typedeffed to zone_t and each one is of type ZONE_DMA, ZONE_NORMAL or ZONE_HIGHMEM. Each zone type suitable a different type of usage. ZONE_DMA is memory in the lower physical memory ranges which certain ISA devices require. Memory within ZONE_NORMAL is directly mapped by the kernel into the upper region of the linear address space. ZONE_HIGHMEM is the remaining available memory in the system and is not directly mapped by the kernel.
build_all_zonelists
checks system state, if in system booting, build all zonelist with __build_all_zonelists
which involves build_zonelists_node
to add page presented zone to zone list.
build_all_zonelists
updates varaible of total number of pages vm_total_pages
after accounting free RAM allocatable within all zones.
register callback function for CPU up/down
page_alloc_init
involves hotcpu_notifier
to register callback function page_alloc_cpu_notify
to know CPUs going up/down when HOTPLUG supported.
hotcpu_notifier
is a MACRO, its defination can be found in include/linux/cpu.h:113
#define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri)
cpu_notifier
can be found in include/linux/cpu.h:52
#define cpu_notifier(fn, pri) { \
static struct notifier_block fn##_nb __cpuinitdata = \
{ .notifier_call = fn, .priority = pri }; \
register_cpu_notifier(&fn##_nb); \
}
In linux to define a new lock, it uses macro DEFINE_MUTEX
, from comment of mutex struct, the principle of it is clear. Mutex is used to protect critical region.
include/linux/mutex.h:97
#define __MUTEX_INITIALIZER(lockname) \
{ .count = ATOMIC_INIT(1) \
, .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
, .wait_list = LIST_HEAD_INIT(lockname.wait_list) \
__DEBUG_MUTEX_INITIALIZER(lockname) \
__DEP_MAP_MUTEX_INITIALIZER(lockname) }
#define DEFINE_MUTEX(mutexname) \
struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
include/linux/mutex.h:48
/*
* Simple, straightforward mutexes with strict semantics:
*
* - only one task can hold the mutex at a time
* - only the owner can unlock the mutex
* - multiple unlocks are not permitted
* - recursive locking is not permitted
* - a mutex object must be initialized via the API
* - a mutex object must not be initialized via memset or copying
* - task may not exit with mutex held
* - memory areas where held locks reside must not be freed
* - held mutexes must not be reinitialized
* - mutexes may not be used in hardware or software interrupt
* contexts such as tasklets and timers
*
* These semantics are fully enforced when DEBUG_MUTEXES is
* enabled. Furthermore, besides enforcing the above rules, the mutex
* debugging code also implements a number of additional features
* that make lock debugging easier and faster:
*
* - uses symbolic names of mutexes, whenever they are printed in debug output
* - point-of-acquire tracking, symbolic lookup of function names
* - list of all locks held in the system, printout of them
* - owner tracking
* - detects self-recursing locks and prints out all relevant info
* - detects multi-task circular deadlocks and prints out all affected
* locks and tasks (and only those tasks)
*/
struct mutex {
/* 1: unlocked, 0: locked, negative: locked, possible waiters */
atomic_t count;
spinlock_t wait_lock;
struct list_head wait_list;
...
};
Inside register_cpu_notifier
, it locks mutex cpu_add_remove_lock
to protect data cpu_chain
. cpu_maps_update_begin
involves mutex_lock
with reference of cpu_add_remove_lock
as its input parameter.
void cpu_maps_update_begin(void)
{
mutex_lock(&cpu_add_remove_lock);
}
Here is the declaration of function mutex_lock
, it changes count from 1 to a 0 value with __mutex_fastpath_lock
and set current task as the owner of the lock with mutex_set_owner
.
/***
* mutex_lock - acquire the mutex
* @lock: the mutex to be acquired
*
* Lock the mutex exclusively for this task. If the mutex is not
* available right now, it will sleep until it can get it.
*
* The mutex must later on be released by the same task that
* acquired it. Recursive locking is not allowed. The task
* may not exit without first unlocking the mutex. Also, kernel
* memory where the mutex resides mutex must not be freed with
* the mutex still locked. The mutex must first be initialized
* (or statically defined) before it can be locked. memset()-ing
* the mutex to 0 is not allowed.
*
* ( The CONFIG_DEBUG_MUTEXES .config option turns on debugging
* checks that will enforce the restrictions and will also do
* deadlock debugging. )
*
* This function is similar to (but not equivalent to) down().
*/
void __sched mutex_lock(struct mutex *lock)
{
might_sleep();
/*
* The locking fastpath is the 1->0 transition from
* 'unlocked' into 'locked' state.
*/
__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
mutex_set_owner(lock);
}
After register completed, unlock lock with cpu_maps_update_done
which involves mutex_unlock
to release the mutex.
void cpu_maps_update_done(void)
{
mutex_unlock(&cpu_add_remove_lock);
}
mutex_unlock
changes count from 0 to 1 with __mutex_fastpath_unlock
.
/***
* mutex_unlock - release the mutex
* @lock: the mutex to be released
*
* Unlock a mutex that has been locked by this task previously.
*
* This function must not be used in interrupt context. Unlocking
* of a not locked mutex is not allowed.
*
* This function is similar to (but not equivalent to) up().
*/
void __sched mutex_unlock(struct mutex *lock)
{
/*
* The unlocking fastpath is the 0->1 transition from 'locked'
* into 'unlocked' state:
*/
#ifndef CONFIG_DEBUG_MUTEXES
/*
* When debugging is enabled we must not clear the owner before time,
* the slow path will always be taken, and that clears the owner field
* after verifying that it was indeed current.
*/
mutex_clear_owner(lock);
#endif
__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
}
register_cpu_notifier
involves raw_notifier_chain_register
with reference of cpu_chain
and callback function as input parameter, raw_notifier_chain_register
further involves notifier_chain_register
to complete the register.
notifier_chain_register
loop notifier chain to find proper place to add the new notify callback function.